Total Confirmed Cases

Age vs Total Cases

point <- format_format(big.mark = " ", decimal.mark = ",", scientific = FALSE)
grid.arrange(
 
    age %>% 
    ggplot(aes(x = age_group, y = total_cases, fill = age_group)) +
    geom_bar(stat = "identity", width = 0.5) +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Age") +
    ylab("Total cases") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point),
    

    age %>% 
    ggplot(aes(x = total_cases, fill = age_group)) +
    geom_histogram(aes(y = ..density..), alpha = 0.5, bins = 50) +
    geom_density(alpha = 0.3, aes(color = age_group)) +
    scale_color_viridis(discrete = TRUE) +
    labs(x = "Total cases",
         y = "Density") +
    theme(legend.position = "right") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
    scale_x_continuous(labels = point) +
    scale_y_continuous(labels = point),
    
    
    age %>%
    ggplot(aes(x = age_group, y = total_cases, fill = age_group)) +
    geom_boxplot(alpha = 0.5) +
    geom_hline(yintercept = median(age$total_cases, na.rm = T), color = "red", size = 0.4, lty = "dashed") +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Age") +
    ylab("Total cases") +
        theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point),


layout_matrix = rbind(c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2)
))

# Highest Total cases
age_17 = 
  age %>% 
  filter(age_group == "0-17") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100)


age_49 =
  age %>% 
  filter(age_group == "18-49") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100) 

age_64 = 
  age %>% 
  filter(age_group == "50-64") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100)

age_65 =
  age %>% 
  filter(age_group == "65+") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100)

age_all = 
    rbind(age_17, age_49, age_64, age_65) %>% 
    arrange(desc(growth_perc)) %>% 
    select(date, age_group, total_cases, growth_perc)

head(age_all) %>% 
    knitr::kable(
    caption = "Highest total cases growth rate by age",
    col.names = c("Date", "Age", "Total cases", "Growth rate"),
    digits = 2
  )
Highest total cases growth rate by age
Date Age Total cases Growth rate
2020-04-29 0-17 1398 9.66
2020-04-27 0-17 1190 8.82
2020-04-23 0-17 936 8.65
2020-05-05 0-17 1937 8.47
2022-01-09 0-17 945336 8.29
2022-01-16 0-17 1158294 7.62
age_all_low = 
    rbind(age_17, age_49, age_64, age_65) %>% 
    arrange(growth_perc) %>% 
    select(date, age_group, total_cases, growth_perc)

head(age_all_low) %>% 
    knitr::kable(
    caption = "Lowest total cases growth rate by age",
    col.names = c("Date", "Age", "Total cases", "Growth rate"),
    digits = 2
  )
Lowest total cases growth rate by age
Date Age Total cases Growth rate
2021-06-29 65+ 391708 -0.17
2021-06-29 50-64 703990 -0.13
2021-06-29 18-49 2127853 -0.11
2021-06-29 0-17 484599 -0.10
2021-04-23 65+ 384595 -0.01
2020-12-23 0-17 234174 0.00

[Text] Note: the red line of boxplot is median

Gender vs Total Cases

point <- format_format(big.mark = " ", decimal.mark = ",", scientific = FALSE)
grid.arrange(
 
    gender %>% 
    ggplot(aes(x = gender, y = total_cases, fill = gender)) +
    geom_bar(stat = "identity", width = 0.5) +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Gender") +
    ylab("Total cases") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
    scale_y_continuous(labels = point),

    gender %>% 
    ggplot(aes(x = total_cases, fill = gender)) +
    geom_histogram(aes(y = ..density..), alpha = 0.5, bins = 30) +
    geom_density(alpha = 0.3, aes(color = gender)) +
    scale_color_viridis(discrete = TRUE) +
    labs(x = "Total cases",
         y = "Density") +
    theme(legend.position = "right") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
    scale_x_continuous(labels = point) +
    scale_y_continuous(labels = point) +
    scale_fill_viridis(discrete = TRUE),
    
    
    gender %>%
    ggplot(aes(x = gender, y = total_cases, fill = gender)) +
    geom_boxplot(alpha = 0.5) +
    geom_hline(yintercept = median(gender$total_cases, na.rm = T), color = "red", size = 0.4, lty = "dashed") +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Gender") +
    ylab("Total cases") +
        theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point),


layout_matrix = rbind(c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2)
))

gender_M =
  gender %>% 
  filter(gender == "Male") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100)

gender_F =
  gender %>% 
  filter(gender == "Female") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100)

gender_all =
    rbind(gender_M, gender_F) %>%
    arrange(desc(growth_perc)) %>% 
    select(date, gender, total_cases, growth_perc)

head(gender_all) %>% 
    knitr::kable(
    caption = "Highest total cases growth rate by gender",
    col.names = c("Date", "Gender", "Total cases", "Growth rate"),
    digits = 2
  )
Highest total cases growth rate by gender
Date Gender Total cases Growth rate
2020-04-29 Male 24372 5.44
2022-01-09 Female 3034425 5.31
2020-04-23 Female 19394 5.15
2020-04-24 Female 20395 4.91
2022-01-09 Male 2813232 4.89
2022-01-16 Female 3445681 4.80
gender_all_1 =
    rbind(gender_M, gender_F) %>%
    arrange(growth_perc) %>% 
    select(date, gender, total_cases, growth_perc)
head(gender_all_1) %>% 
    knitr::kable(
    caption = "Lowest total cases growth rate by gender",
    col.names = c("Date", "Gender", "Total cases", "Growth rate"),
    digits = 2
  )
Lowest total cases growth rate by gender
Date Gender Total cases Growth rate
2021-06-29 Male 1774418 -0.12
2021-06-29 Female 1884983 -0.11
2020-12-23 Male 945758 0.00
2020-12-30 Male 1064781 0.00
2020-12-23 Female 993649 0.00
2020-12-30 Female 1121071 0.00

[Text]

Race vs Total Cases

point <- format_format(big.mark = " ", decimal.mark = ",", scientific = FALSE)
grid.arrange(
 
    race %>%
    mutate(race_group = recode(race_group, "American Indian or Alaska Native" = "Indian/Alaska",
           "Native Hawaiian and other Pacific Islander" = "Hawaiian/Islander")) %>% 
    mutate(race_group = fct_reorder(race_group, total_cases)) %>%
    ggplot(aes(x = race_group, y = total_cases, fill = race_group)) +
    geom_bar(stat = "identity", width = 0.5) +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Race") +
    ylab("Total cases") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
    scale_y_continuous(labels = point) +
    theme(axis.title.x = element_blank(),
         axis.text.x = element_blank(),
         axis.ticks.x = element_blank()),
    

    race %>% 
    mutate(race_group = recode(race_group, "American Indian or Alaska Native" = "Indian/Alaska",
           "Native Hawaiian and other Pacific Islander" = "Hawaiian/Islander")) %>% 
    mutate(race_group = fct_reorder(race_group, total_cases)) %>%
    ggplot(aes(x = total_cases, fill = race_group)) +
    geom_histogram(aes(y = ..density..), alpha = 0.5, bins = 30) +
    geom_density(alpha = 0.1, aes(color = race_group)) +
    scale_color_viridis(discrete = TRUE) +
    labs(x = "Total cases",
         y = "Density") +
    theme(legend.position = "right") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) + 
    xlim(0, 2.5e6) +   
    ylim(0, 1.5e-5),
   
    


race %>%
    mutate(race_group = fct_reorder(race_group, total_cases)) %>% 
    mutate(race_group = recode(race_group, "American Indian or Alaska Native" = "Indian/Alaska",
           "Native Hawaiian and other Pacific Islander" = "Hawaiian/Islander")) %>% 
    ggplot(aes(x = race_group, y = total_cases, fill = race_group)) +
    geom_boxplot(alpha = 0.5) +
    geom_hline(yintercept = median(race$total_cases, na.rm = T), color = "red", size = 0.4, lty = "dashed") +
    ylim(0, 30000) +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Race") +
    ylab("Total cases") +
        theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1)),
 


layout_matrix = rbind(c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2)
))

(table for race and demo have not finished yet)

[Text]

Area vs Total Cases

point <- format_format(big.mark = " ", decimal.mark = ",", scientific = FALSE)
grid.arrange(
 
    demo %>%
    mutate(county_name = fct_reorder(county_name, cumulative_cases)) %>%
    ggplot(aes(x = county_name, y = cumulative_cases, fill = county_name)) +
    geom_bar(stat = "identity", width = 0.5) +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Area") +
    ylab("Total cases") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
    scale_y_continuous(labels = point) +
    theme(axis.title.x = element_blank(),
         axis.text.x = element_blank(),
         axis.ticks.x = element_blank()),
    

    demo %>% 
    mutate(county_name = fct_reorder(county_name, cumulative_cases)) %>%
    ggplot(aes(x = cumulative_cases, fill = county_name)) +
    geom_histogram(aes(y = ..density..), alpha = 0.5, bins = 50) +
    geom_density(alpha = 0.3, aes(color = county_name)) +
    scale_color_viridis(discrete = TRUE) +
    labs(x = "Total cases",
         y = "Density") +
    theme(legend.position = "bottom") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    xlim(0, 40000) +
    ylim(0, 0.005) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)),

   
    


demo %>%
    mutate(county_name = fct_reorder(county_name, cumulative_cases)) %>% 
    ggplot(aes(x = county_name, y = cumulative_cases, fill = county_name, color = "transparent")) +
    geom_boxplot(alpha = 0.5) +
    geom_hline(yintercept = median(demo$cumulative_cases, na.rm = T), color = "red", size = 0.4, lty = "dashed") +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Area") +
    ylab("Total cases") +
        theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point) +
    ylim(0, 40000) +
    theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()),
    


layout_matrix = rbind(c(1, 1, 1, 1, 2, 2, 2),
                      c(1, 1, 1, 1, 2, 2, 2),
                      c(3, 3, 3, 3, 2, 2, 2),
                      c(3, 3, 3, 3, 2, 2, 2),
                      c(3, 3, 3, 3, 2, 2, 2)
))

Death Rate

Age vs Death Rate

age %>% 
    mutate(date = factor(date)) %>%
    mutate(text_label = str_c("Date: ", date, 
                              "\n Age: ", age_group,
                              "\n Death(%): ", percent_deaths)) %>%
    plot_ly(y = ~percent_deaths, 
          x = ~date, 
          color = ~age_group, 
          width = 950,
          height = 300, 
          type = "scatter",
          mode = "markers",
          marker = list(size = 3),
          colors = "inferno",
          text = ~ text_label) %>%
    layout(xaxis = list(
           title = "Date",
           tickangle = 60),
           yaxis = list(
           title = "Death Rate"))

[Text]

Gender vs Death Rate

gender %>% 
    mutate(date = factor(date)) %>%
    mutate(text_label = str_c("Date: ", date, 
                              "\n Gender: ", gender,
                              "\n Death(%): ", percent_deaths)) %>%
    plot_ly(y = ~percent_deaths, 
          x = ~date, 
          color = ~gender, 
          width = 950,
          height = 300, 
          type = "scatter",
          mode = "markers",
          marker = list(size = 3),
          colors = "viridis",
          text = ~ text_label) %>%
    layout(xaxis = list(
           title = "Date",
           tickangle = 60),
           yaxis = list(
           title = "Death Rate"))

[Text]

Race vs Death Rate

race %>% 
    mutate(date = factor(date)) %>%
    mutate(race_group = recode(race_group, "American Indian or Alaska Native" = "Indian/Alaska",
           "Native Hawaiian and other Pacific Islander" = "Hawaiian/Islander")) %>% 
    mutate(text_label = str_c("Date: ", date, 
                              "\n Race: ", race_group,
                              "\n Death(%): ", percent_deaths)) %>%
    plot_ly(y = ~percent_deaths, 
          x = ~date, 
          color = ~race_group, 
          width = 950,
          height = 300, 
          type = "scatter",
          mode = "markers",
          marker = list(size = 3),
          colors = "inferno",
          text = ~ text_label) %>%
    layout(xaxis = list(
           title = "Date",
           tickangle = 60),
           yaxis = list(
           title = "Death Rate"))

[Text]

Area vs Death Rate

demo %>% 
    mutate(percent_deaths = (cumulative_deaths / cumulative_cases) * 100) %>%
    mutate(date = factor(date)) %>%
    mutate(text_label = str_c("Date: ", date, 
                              "\n Area: ", county_name,
                              "\n Death(%): ", percent_deaths)) %>%
    plot_ly(y = ~percent_deaths, 
          x = ~date, 
          color = ~county_name , 
          width = 950,
          height = 500, 
          type = "scatter",
          mode = "markers",
          marker = list(size = 3),
          colors = "inferno",
          text = ~ text_label) %>%
    layout(xaxis = list(
           title = "Date",
           tickangle = 60),
           yaxis = list(
           title = "Death Rate",
           range = c(0, 13)))

[Text: Note: states users can use our dashboard to research this]